import os
import sys
path_codedir = '/'.join(os.getcwd().split('/')[:os.getcwd().split('/').index('ipn') + 1]) + '/code'
sys.path.append(path_codedir)
from init import set_log
set_log('main_log.txt')
import logging
logger = logging.getLogger(__name__)
from init import PATHS
import subprocess



def main():
    logger.info('***********************************************************')
    logger.info('******************  A_AutoIndustry  ********************')
    logger.info('***********************************************************')
    """ 
    Stage A Goal: Construct a sample of firms that belong to the automotive industry
    
    1) Constructing OEMs.csv
    As a starting point, we manually assemble a file called `OEMs.csv` using data from Marklines to 
    contain information about corporate structure over time.
    To illustrate the structure of this file, we provided a mock example under `private_data/Marklines/OEMs_mock.csv`
    OEM_Level1_ID is the identifier we create at the highest possible level. 
    OEM_Level2_ID is another identifier we create that is one level below, e.g., Jaguar/Land Rover is a level 2 firm because it belongs to Ford Group
    We started with the list of names of OEMs as provided by Marklines and manually match them to firms in Orbis. 

    2) Collecting Orbis ids and names of subsidiaries of these OEMs    
    Note: the paths to Orbis files are set within a_find_oem_subsidiaries.py
    """
    from A_AutoIndustry import a_find_oem_subsidiaries.py
    a_find_oem_subsidiaries.main()
    """
    Next, get firm names, ownership history and industry classification of subsidiaries  
    """
    from A_AutoIndustry import b_1_get_orbis_info_subsidiaries.py
    b_1_get_orbis_info_subsidiaries.main()
    """
    combine OEMs and subsidiaries names and industry    
    """
    from A_AutoIndustry import b_2_combine_subsidiary_info.py
    b_2_combine_subsidiary_info.main()
    """ 
    A sample of the file created by the script above is shown under 
    data_files/Data_outputted/A_AutoIndustry/OEM_and_Subsidiaries_mock.csv to illustrate the data structure.
    It contains the names and IDs of the car manufacturers and their subsidiaries, and the NAICS industry code of the subsidiaries. 
    """
    """
    3) Collecting primary and secondary NAICS code information for all Orbis firms included in NAICS 336111 
    """
    from A_AutoIndustry import c_cleaning_data_from_orbis_platform.py
    c_cleaning_data_from_orbis_platform.main()
    """
    4) Clean up sales data 
    """
    from A_AutoIndustry import d_sales
    d_sales.main()

    logger.info('***********************************************************')
    logger.info('******************  B_FactsetVariables  ********************')
    logger.info('***********************************************************')
    """
    In Stage B, we clean up data on suppliers from Factset and merge it with our panel of OEMs.  
    """
    """ Find the OEMs in factset (the script does a quick name matching) """
    from B_FactsetVariables import a_adding_factset_ids
    a_adding_factset_ids.main()
    """ Find suppliers of OEMs in factset and export their naics code.  """
    from B_FactsetVariables import b_finding_suppliers
    b_finding_suppliers.main()
    """ Output datasets with supply-chain variables at the OEM level, OEM-supplier level, OEMs-year level,
    OEM-supplier-year level.  """
    from B_FactsetVariables import c_firm_year_fct
    c_firm_year_fct.main()

    logger.info('***********************************************************')
    logger.info('******************  C_PatentVariables  ********************')
    logger.info('***********************************************************')
    """
    In Stage C, we clean up data from PATSTAT.  
    """
    """ First, clean patstat citations - useful for later """
    from C_PatentVariables import a_cleancitations
    a_cleancitations.main()
    """ Collect all CPC and IPC subgroups that we want to capture (i.e. energy) """
    from C_PatentVariables import a_finding_all_CPCIPC_subgroups
    a_finding_all_CPCIPC_subgroups.main()
    """ Collect patents of OEMs and aggregate at family level, and at the firm-year level """
    from C_PatentVariables import b_patent_oems
    b_patent_oems.main()
    """ Collect patents of suppliers and aggregate at family level """
    from C_PatentVariables import c_patent_suppliers
    c_patent_suppliers.main()
    """ Collect patents with cpc and ipc in transportation and aggregate at family level """
    from C_PatentVariables import d_patent_ipccpc_transportation
    d_patent_ipccpc_transportation.main()
    """ Collect patents with nace in motor industry and aggregate at family level """
    from C_PatentVariables import e_patent_nace_autoindustry
    e_patent_nace_autoindustry.main()
    """ Output csv that provides info about the overlap between b, c, d and e """
    from C_PatentVariables import f_overlap_and_OtherFirms_in_ipc_cpc_transpo
    f_overlap_and_OtherFirms_in_ipc_cpc_transpo.main()
    """ Count families of different types for each year """
    from C_PatentVariables import g_aggregating_families_yearlevel
    g_aggregating_families_yearlevel.main()
    """   """
    from C_PatentVariables import h_suppliers_patentcount_at_oem_level
    h_suppliers_patentcount_at_oem_level.main()
    """   """
    from C_PatentVariables import i_classifying_forward_and_backward_citations
    i_classifying_forward_and_backward_citations.main()
    """   """
    from C_PatentVariables import j_cross_sectoral_citations
    j_cross_sectoral_citations.main()

    logger.info('***********************************************************')
    logger.info('**********************  D_Policy  ***********************')
    logger.info('***********************************************************')
    """ Clean IEA RD&D data """
    from D_Policy import a_iea_rdd
    a_iea_rdd.main()
    '''
    MANUAL STEP!! 
    The script above outputs IEA_PPP_2018_recomputedbyMD.csv (which we are not allowed to share since it contains IEA data)
    We next combine that data with data in Additional_R&D_Data_manuallycollected.xls (which we manually collected)
    Doing so, we created the file RDD1995_2020.csv which is used in the next script 
    A mock sample of that file is provided under Data_outputted/D_Policy/RDD1995_2020_mock.csv to illustrate the data structure. 
    '''
    from D_Policy import b_policy_dataset
    b_policy_dataset.main()

    logger.info('***********************************************************')
    logger.info('**********************  E_Analysis  ***********************')
    logger.info('***********************************************************')

    # Outputs the key figures shown in the manuscript
    from E_Analysis import a_figures_maintext
    a_figures_maintext.main()

    from E_Analysis import b_othergraphs_batteries_trans_vs_nontr
    b_othergraphs_batteries_trans_vs_nontr.py.main()
    from E_Analysis import b_othergraphs_crosssectoral
    b_othergraphs_crosssectoral.main()
    from E_Analysis import b_othergraphs_OEMs
    b_othergraphs_OEMs.main()
    from E_Analysis import b_othergraphs_patents_familylevel
    b_othergraphs_patents_familylevel.main()
    from E_Analysis import b_othergraphs_policies
    b_othergraphs_policies.main()
    from E_Analysis import b_othergraphs_spillovers
    b_othergraphs_spillovers.main()
    from E_Analysis import b_othergraphs_suppliers
    b_othergraphs_suppliers.main()

    """ Regressions """
    from E_Analysis import c_combining_datasets_for_regressions
    c_combining_datasets_for_regressions.main()
    # Runnig stata code:
    dofile = PATHS.github / ('code/E_Analysis/d_policy_regressions.do')
    cmd = ["stata", "do", dofile]
    subprocess.call(cmd)

    logger.info('THE END')


main()



